import fasttext
import re
import mysql.connector
from sklearn import metrics
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report
import numpy as np
import math



def dbConnection():
    mydb = mysql.connector.connect(
        host="",
        user="",
        passwd="",
        database=""
    )

    mycursor = mydb.cursor()
    mydb.autocommit = True

    return mydb, mycursor


# close database connection
def finish(mydb, mycursor):
    mycursor.close()
    mydb.close()

def createModel(trainfile,modelN,wordEmb):
  
    hyper_params = {"lr": 0.25, "epoch": 25, "wordNgrams": 2, "bucket": 20000, "dim": 300, "thread": 2,"loss": 'softmax', "pretrainedVectors": wordEmb}
    model = fasttext.train_supervised(input=trainfile, **hyper_params)
    model.save_model(modelN)
    print("Model trained with the hyperparameter \n {}".format(hyper_params))

def getPredictions(testFile,modelN):
    model = fasttext.load_model(modelN)
    lines = []
    with open(testFile, 'r') as f:
        line = f.readline()
        lines.append(line)

        while line:
            line = f.readline()
            if line != '':
                lines.append(line)

    sentences_with_labels = {}
    for sent_label in lines:
        sent = re.sub('__label__\d+', '', sent_label)

        sent = sent.replace('\n', '')
        actuallabel = re.findall('__label__\d+', sent_label, re.S)
        actuallabel = int(
            str(actuallabel).replace('__label__', '').replace("[", "").replace("]", "").replace("'", "").strip())

        predictions_per_sent = model.predict(sent, k=1)
        predicted_label = int(str(predictions_per_sent[0]).replace("('__label__", "").replace("',)", "").strip())
        sentences_with_labels[sent.strip()] = [actuallabel, predicted_label]

    return sentences_with_labels


def evaluateModel(sentences_with_labels):
    labels = ['0', '1', '2', '3', '4']
    #labels = ['0', '1', '2', '3', '4','5']


    flat_true_labels = []
    flat_predictions = []
    for s in sentences_with_labels.keys():
        actuallabels = sentences_with_labels[s][0]
        predictedlabels = sentences_with_labels[s][1]
        flat_true_labels.append(actuallabels)
        flat_predictions.append(predictedlabels)

    classif_rep = classification_report(flat_true_labels, flat_predictions, target_names=labels)
    print(classif_rep)

    microres = precision_recall_fscore_support(flat_true_labels, flat_predictions, average='micro')
    print("microres: ", microres)
    macro = precision_recall_fscore_support(flat_true_labels, flat_predictions, average='macro')
    print("macrores: ", macro)
    return microres,macro


def evaluateBinary(sentences_with_labels):
    flat_true_labels = []
    flat_predictions = []
    for s in sentences_with_labels.keys():
        actuallabels = sentences_with_labels[s][0]
        predictedlabels = sentences_with_labels[s][1]
        flat_true_labels.append(actuallabels)
        flat_predictions.append(predictedlabels)

    p = metrics.precision_score(flat_true_labels, flat_predictions)
    r = metrics.recall_score(flat_true_labels, flat_predictions)
    f1 = metrics.f1_score(flat_true_labels, flat_predictions)
    a = metrics.accuracy_score(flat_true_labels, flat_predictions)

    print("p: ", p, "r: ", r, "f1: ", f1, "a: ", a)
    return p,r,f1

def variance(data, ddof=0):
    n = len(data)
    mean = sum(data) / n
    return sum((x - mean) ** 2 for x in data) / (n - ddof)

def stdev(data):
    var = variance(data)
    std_dev = math.sqrt(var)
    return std_dev

def averageRes(microres1, macro1,microres2, macro2,microres3, macro3):
    avgPmicro = (microres1[0]+microres2[0]+microres3[0])/3
    stdPmicro = stdev([microres1[0],microres2[0],microres3[0]])

    avgRmicro = (microres1[1] + microres2[1] + microres3[1]) / 3
    stdRmicro = stdev([microres1[1],microres2[1],microres3[1]])

    avgFmicro = (microres1[2]+microres2[2]+microres3[2])/3
    stdFmicro = stdev([microres1[2], microres2[2], microres3[2]])

    avgPmacro = (macro1[0] + macro2[0] + macro3[0]) / 3
    stdPmacro = stdev([macro1[0], macro2[0], macro3[0]])

    avgRmacro = (macro1[1]+macro2[1]+macro3[1])/3
    stdRmacro = stdev([macro1[1], macro2[1], macro3[1]])

    avgFmacro = (macro1[2]+macro2[2]+macro3[2])/3
    stdFmacro = stdev([macro1[2], macro2[2], macro3[2]])

    return avgPmicro,stdPmicro,avgRmicro,stdRmicro,avgFmicro,stdFmicro,avgPmacro,stdPmacro,avgRmacro,stdRmacro,avgFmacro,stdFmacro

def averageBinary(p1,r1,f11,p2,r2,f12,p3,r3,f13):
    avgP = (p1+p2+p3)/3
    avgR = (r1+r2+r3)/3
    avgF = (f11+f12+f13)/3

    stdP = stdev([p1,p2,p3])
    stdR = stdev([r1, r2, r3])
    stdF = stdev([f11, f12, f13])

    return avgP,stdP,avgR,stdR,avgF,stdF




def main():
    wordEmb = ""
    ftTrainFileIt1 = ""
    ftTestFile = ""
    ftModelIt1 = ""
    ftTrainFileIt2 = ""
    ftModelIt2 = ""
    ftTrainFileIt3 = ""
    ftModelIt3 = ""

    print("----------------------5b10 It1-------------------------------")
    createModel(ftTrainFileIt1,ftModelIt1,wordEmb)
    sentences_with_labels = getPredictions(ftTestFile,ftModelIt1)
    microres1, macro1 = evaluateModel(sentences_with_labels)

    print("----------------------5b10 It2-------------------------------")
    createModel(ftTrainFileIt2, ftModelIt2)
    sentences_with_labels = getPredictions(ftTestFile, ftModelIt2)
    microres2, macro2 = evaluateModel(sentences_with_labels)

    print("----------------------5b10 It3-------------------------------")
    createModel(ftTrainFileIt3, ftModelIt3)
    sentences_with_labels = getPredictions(ftTestFile, ftModelIt3)
    microres3, macro3 = evaluateModel(sentences_with_labels)

    avgPmicro, stdPmicro, avgRmicro, stdRmicro, avgFmicro, stdFmicro, avgPmacro, stdPmacro, avgRmacro, stdRmacro, avgFmacro, stdFmacro = averageRes(microres1, macro1,microres2, macro2,microres3, macro3)
    print("--------------Average Results Micro------------------------------")
    print("avg P micro: ",str(avgPmicro),"+-("+str(stdPmicro)+")")
    print("avg R micro: ", str(avgRmicro), "+-(" + str(stdRmicro) + ")")
    print("avg F micro: ", str(avgFmicro), "+-(" + str(stdFmicro) + ")")
    print("--------------Average Results Macro------------------------------")
    print("avg P macro: ", str(avgPmacro), "+-(" + str(stdPmacro) + ")")
    print("avg R macro: ", str(avgRmacro), "+-(" + str(stdRmacro) + ")")
    print("avg F macro: ", str(avgFmacro), "+-(" + str(stdFmacro) + ")")
  

main()

